* Creates data for Appendix B and then outputs results for Tables B1-B3

set more off

* First, rebuild data with all the required investigation, treatment and operation codes
*******************************************************************************************

* a&e data
u "$saveddata/data_ae2011", clear
append using "$saveddata/data_ae2012"
* Keep only major departments
keep if aedepttype==1

* add inpatient data for those that are admitted (recorded same-day)
gen admidate_f = exitdate
merge m:1 extract_hes admidate_f admit using "$saveddata/data_inpat2012.dta", keepusing(spell_los number_proc cost dismeth_f tretspef opertn*) 
drop if _merge==2	// need to re-include these if we want 30-day measures of intensity
tab _merge admit if finyear==2012, nofreq col
rename _merge _merge2

merge m:1 extract_hes admidate_f admit using "$saveddata/data_inpat2011.dta", keepusing(spell_los number_proc cost dismeth_f tretspef opertn*) update
drop if _merge==2	// need to re-include these if we want 30-day measures of intensity
tab _merge2 admit if finyear==2011, nofreq col
rename _merge _merge1

* merge checks and missing data variable
gen missing_inpat = admit & _merge1==1 & _merge2==1 	// admit and missing inpatient data
assert _merge1==1 & _merge2==1 if !admit		// non-admit and no inpatient data 
drop _merge*

* set intensity variables to zero for non-admits
foreach var of varlist spell_los number_proc cost {
	replace `var' = 0 if `var'==. & !admit
	}
gen missing_cost = cost_em==.
label var cost_em "Cost associated with initial inpatient admission"

* health outcome variables (incl. 2013 data for revisits/admits)
append using "$saveddata/data_ae2013", force
sort extract_hesid arrivaldate
by extract_hesid: gen daydiff = arrivaldate[_n+1] - arrivaldate
gen revisit30 = daydiff<=30
gen readmit30 = daydiff<=30 & admit[_n+1] 
gen revisit365 = daydiff<=365
gen readmit365 = daydiff<=365 & admit[_n+1]
drop if arrivaldate>=td(1apr2013)


gen death = aeattenddisp==10 | dismeth_f==4
replace death = . if missing_inpat		// cannot evaluate death if no inpatient data

* discharge variables
gen discharge_gp = aeattenddisp==2
gen discharge_no = aeattenddisp==3

* create bins
gen bin = 10 if depdur<=10
forv x = 20(10)800 {
	local y = `x' - 10
	replace bin = `x' if depdur>`y' & depdur<=`x'
	}
replace bin = 810 if depdur>800 & depdur!=.

* Create A&E and total costs
merge m:1 sushrg finyear using "$inputdata/ae_hrg.dta"

gen ae_cost = ae_tariff_usd
gen missing_ae_cost = 0
replace missing_ae_cost=1 if _merge==1
drop _merge

set more off
gen total_cost = ae_cost + cost_em

label var ae_cost "ER cost"
label var total_cost "Total cost of hospital treatment"

* drop patients with missing information
gen sample = 1
foreach var in d_age d_ambulance d_male d_urban tcount icount admit discharge discharge_gp discharge_no spell_los cost_em number_proc revisit30 readmit30 death ae_cost total_cost{
replace sample = 0 if `var'==.
}

* Keep only those in the sample
keep if sample==1
drop sample

count

drop if finyear==2013
replace d1=40 if d1==.

* Keep necessary variables
keep extract opertn* admit *1 *2 *3 *4 *5 *6 *7 *8 *9 *10 *11 *12 admidate_f


* Think about recoding ED investigations so that you can't have a missing/no investigation in e.g. i2, but then have a real investigation in i3
gen first_invest = i1

forval x=1(1)24{
gen subs_invest`x' = 0
	forval y=2(1)12{
	replace subs_invest`x' = 1 if i`y'==`x'
	}
}

* change those with 'no' investigations in i1, but who do have subsequent ones
replace first_invest = i2 if i1==24 & i2!=. & i2!=24
foreach x in 3 4 5 6 7 8 9 10 11 12{
replace first_invest = i`x' if i1==24 & i`x'!=. & i`x'!=24
}

*  Repeat this for treatments
gen first_treat = t1

forval x=1(1)57{
gen subs_treat`x' = 0
	forval y=2(1)8{
	replace subs_treat`x' = 1 if t`y'==`x'
	}
}

forval x=99(1)99{
gen subs_treat`x' = 0
	forval y=2(1)8{
	replace subs_treat`x' = 1 if t`y'==`x'
	}
}

* change those with 'advice/guidance only' treatments in t1, but who do have subsequent ones
replace first_treat = t2 if t1==22 & t2!=. & t2!=22
foreach x in 3 4 5 6 7 8{
replace first_treat = t`x' if t1==22 & t`x'!=. & t`x'!=22
}

count


*** Now we reshape the data and count procedures etc to produce the descriptives in Tables B1 - B3

*******************
* ED investigations
*******************

set more off 
preserve

bys extract d1 admidate_f : gen x=1 if _n==1
keep if x==1
drop x

keep d1 first_invest extract_hesid admidate_f
rename first_invest i1
drop if i1==.
*drop if opertn_1==""

reshape long i, i(d1 extract admidate_f) j(op_no)

rename i op_code

drop if op_code==.

bys d1 op_code: gen z=_n
bys d1 extract admidate_f: gen x=1 if _n==1
egen xx = sum(x), by(d1)

collapse (max) z xx, by(d1 op_code)

sort d1 z

bys d1: gen y=_n
bys d1: gen yy=_N

gen rank = yy-y
replace rank = rank+1

keep if rank<4

replace d1=40 if d1==.

gen prop = z/xx

keep d1 z xx prop rank op_code

sort d1 rank

rename xx d1_pats
rename z op_pats

reshape wide op_code prop op_pats d1_pats, i(d1) j(rank)

outsheet using "$results/TableB1_first_edinvest.csv", comma replace 

restore

* Repeat this for ADMITS ONLY - this is for Table B2

set more off 
preserve

keep if admit==1

bys extract d1 admidate_f : gen x=1 if _n==1
keep if x==1
drop x

keep d1 first_invest extract_hesid admidate_f
rename first_invest i1
drop if i1==.
*drop if opertn_1==""

reshape long i, i(d1 extract admidate_f) j(op_no)

rename i op_code

drop if op_code==.

bys d1 op_code: gen z=_n
bys d1 extract admidate_f: gen x=1 if _n==1
egen xx = sum(x), by(d1)

collapse (max) z xx, by(d1 op_code)

sort d1 z

bys d1: gen y=_n
bys d1: gen yy=_N

gen rank = yy-y
replace rank = rank+1

keep if rank<4

replace d1=40 if d1==.

gen prop = z/xx

keep d1 z xx prop rank op_code

sort d1 rank

rename xx d1_pats
rename z op_pats

reshape wide op_code prop op_pats d1_pats, i(d1) j(rank)

outsheet using "$results/TableB2_first_edinvest.csv", comma replace 
restore


*******************
* ED treatments
*******************

preserve

bys extract d1 admidate_f : gen x=1 if _n==1
keep if x==1
drop x

keep d1 first_treat extract_hesid admidate_f
rename first_treat t1
drop if t1==.

reshape long t, i(d1 extract admidate_f) j(op_no)

rename t op_code

drop if op_code==.

bys d1 op_code: gen z=_n
bys d1 extract admidate_f: gen x=1 if _n==1
egen xx = sum(x), by(d1)

collapse (max) z xx, by(d1 op_code)

sort d1 z

bys d1: gen y=_n
bys d1: gen yy=_N

gen rank = yy-y
replace rank = rank+1

keep if rank<4

replace d1=40 if d1==.

gen prop = z/xx

keep d1 z xx prop rank op_code

sort d1 rank

rename xx d1_pats
rename z op_pats

reshape wide op_code prop op_pats d1_pats, i(d1) j(rank)

outsheet using "$results/TableB1_first_edtreat.csv", comma replace 


restore

* AND DO AGAIN FOR ADMIT ONLY - TABLE B2 (Treatments)

preserve

bys extract d1 admidate_f : gen x=1 if _n==1
keep if x==1
drop x

keep if admit==1

keep d1 first_treat extract_hesid admidate_f
rename first_treat t1
drop if t1==.

reshape long t, i(d1 extract admidate_f) j(op_no)

rename t op_code

drop if op_code==.

bys d1 op_code: gen z=_n
bys d1 extract admidate_f: gen x=1 if _n==1
egen xx = sum(x), by(d1)

collapse (max) z xx, by(d1 op_code)

sort d1 z

bys d1: gen y=_n
bys d1: gen yy=_N

gen rank = yy-y
replace rank = rank+1

keep if rank<4

replace d1=40 if d1==.

gen prop = z/xx

keep d1 z xx prop rank op_code

sort d1 rank

rename xx d1_pats
rename z op_pats

reshape wide op_code prop op_pats d1_pats, i(d1) j(rank)

outsheet using "$results/TableB2_first_edtreat.csv", comma replace 

restore

set more off
* Count most subsequent inpatient treatments

preserve

bys extract d1 admidate_f : gen x=1 if _n==1
keep if x==1
drop x

keep d1 first_invest subs_invest* extract_hesid admidate_f
drop if first_invest==.
drop first_invest

gen one = 1

collapse (sum) subs_invest* one, by(d1)

drop subs_invest24

reshape long subs_invest, i(d1) j(op_code)

gen prop = subs_invest / one

sort d1 prop

bys d1: gen y=_n
bys d1: gen yy=_N

gen rank = yy-y
replace rank = rank+1

keep if rank<4

replace d1=40 if d1==.

drop y yy

rename subs_invest op_pats
rename one d1_pats
reshape wide op_code op_pats prop, i(d1) j(rank)

outsheet using "$results/TableB1_first_edinvest.csv", comma replace 

* Now repeat for admits only (Table 2)
restore

set more off
preserve

bys extract d1 admidate_f : gen x=1 if _n==1
keep if x==1
drop x

keep if admit==1

keep d1 first_invest subs_invest* extract_hesid admidate_f
drop if first_invest==.
drop first_invest

gen one = 1

collapse (sum) subs_invest* one, by(d1)

drop subs_invest24

reshape long subs_invest, i(d1) j(op_code)

gen prop = subs_invest / one

sort d1 prop

bys d1: gen y=_n
bys d1: gen yy=_N

gen rank = yy-y
replace rank = rank+1

keep if rank<4

replace d1=40 if d1==.

drop y yy

rename subs_invest op_pats
rename one d1_pats
reshape wide op_code op_pats prop, i(d1) j(rank)

outsheet using "$results/TableB2_first_edinvest.csv", comma replace 

restore

********************************
** And repeat this for subsequent treatments (last column in B1 and B2))
********************************

preserve

bys extract d1 admidate_f : gen x=1 if _n==1
keep if x==1
drop x

keep d1 first_treat subs_treat* extract_hesid admidate_f
drop if first_treat==.
drop first_treat

gen one = 1

collapse (sum) subs_treat* one, by(d1)

drop subs_treat22

reshape long subs_treat, i(d1) j(op_code)

gen prop = subs_treat / one

sort d1 prop

bys d1: gen y=_n
bys d1: gen yy=_N

gen rank = yy-y
replace rank = rank+1

keep if rank<4

replace d1=40 if d1==.

drop y yy

rename subs_treat op_pats
rename one d1_pats
reshape wide op_code op_pats prop, i(d1) j(rank)

outsheet using "$results/TableB1_first_edtreat.csv", comma replace 

restore


preserve

bys extract d1 admidate_f : gen x=1 if _n==1
keep if x==1
drop x

keep if admit==1

keep d1 first_treat subs_treat* extract_hesid admidate_f
drop if first_treat==.
drop first_treat

gen one = 1

collapse (sum) subs_treat* one, by(d1)

drop subs_treat22

reshape long subs_treat, i(d1) j(op_code)

gen prop = subs_treat / one

sort d1 prop

bys d1: gen y=_n
bys d1: gen yy=_N

gen rank = yy-y
replace rank = rank+1

keep if rank<4

replace d1=40 if d1==.

drop y yy

rename subs_treat op_pats
rename one d1_pats
reshape wide op_code op_pats prop, i(d1) j(rank)

outsheet using "$results/TableB2_first_edtreat.csv", comma replace 

restore

* Now do the same analysis for INPATIENT operations (Table B3)

* DO just for first procedure

preserve

bys extract d1 admidate_f : gen x=1 if _n==1
keep if x==1
drop x

keep if admit==1

keep d1 opertn_1 extract_hesid admidate_f
drop if opertn_1==""

reshape long opertn_, i(d1 extract admidate_f) j(op_no)

rename opertn_ op_code

drop if op_code==""

gen op_temp = substr(op_code,1,3)
replace op_code = op_temp
drop op_temp

bys d1 op_code: gen z=_n
bys d1 extract admidate_f: gen x=1 if _n==1
egen xx = sum(x), by(d1)

collapse (max) z xx, by(d1 op_code)

sort d1 z

bys d1: gen y=_n
bys d1: gen yy=_N

gen rank = yy-y
replace rank = rank+1

keep if rank<4

replace d1=40 if d1==.

gen prop = z/xx

keep d1 z xx prop rank op_code

sort d1 rank


rename xx pats
drop z
* Works to here
reshape wide op_code prop pats, i(d1) j(rank)

outsheet using "$results/TableB3_firstproc.csv", comma replace 


restore



* And repeat for subsequent procedures

* combo
preserve

bys extract d1 admidate_f : gen x=1 if _n==1
keep if x==1
drop x

keep if admit==1

keep d1 opertn_* extract_hesid admidate_f
drop if opertn_1==""
drop opertn_1

reshape long opertn_, i(d1 extract admidate_f) j(op_no)

rename opertn_ op_code

drop if op_code==""
gen op_temp = substr(op_code,1,3)
replace op_code = op_temp
drop op_temp

bys d1 op_code: gen z=_n
bys d1 extract admidate_f: gen x=1 if _n==1
egen xx = sum(x), by(d1)

collapse (max) z xx, by(d1 op_code)

sort d1 z

bys d1: gen y=_n
bys d1: gen yy=_N

gen rank = yy-y
replace rank = rank+1

keep if rank<4

replace d1=40 if d1==.

gen prop = z/xx

keep d1 z xx prop rank op_code

sort d1 rank


rename xx pats
drop z
* Works to here
reshape wide op_code prop pats, i(d1) j(rank)

outsheet using "$results/TableB3_subsproc.csv", comma replace 

